Importing data and packages

library(sf)
library(raster)
library(tidyverse)
library(magrittr)
# Read crime data to tibble
Crime_2019 <- read_csv("data/crime-2019.csv", 
                       col_types = list(ID = col_integer(),
                                        `Case Number` = col_character(),
                                        Date = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
                                        Block = col_factor(),
                                        IUCR = col_factor(),
                                        `Primary Type` = col_factor(),
                                        Description = col_factor(),
                                        `Location Description` = col_factor(),
                                        Arrest = col_logical(),
                                        Domestic = col_logical(),
                                        Beat = col_factor(),
                                        District = col_factor(),
                                        Ward = col_integer(),
                                        `Community Area` = col_integer(),
                                        `X Coordinate` = col_double(),
                                        `Y Coordinate` = col_double(),
                                        Year = col_integer(),
                                        `Updated On` = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
                                        Latitude = col_double(),
                                        Longitude = col_double(),
                                        Location = col_character()))
Crime_NA <- na.omit(Crime_2019) %>% tibble()
Crime_2019
## # A tibble: 260,427 x 22
##        ID `Case Number` Date                Block IUCR  `Primary Type`
##     <int> <chr>         <dttm>              <fct> <fct> <fct>         
##  1 1.22e7 JD399576      2019-12-07 12:33:00 066X… 1544  SEX OFFENSE   
##  2 1.19e7 JC535200      2019-12-04 06:00:00 032X… 1563  SEX OFFENSE   
##  3 1.18e7 JC358770      2019-07-21 18:35:00 041X… 041A  BATTERY       
##  4 1.22e7 JD439375      2019-11-23 00:00:00 050X… 0265  CRIMINAL SEXU…
##  5 1.22e7 JD439346      2019-11-11 23:59:00 050X… 1544  SEX OFFENSE   
##  6 1.22e7 JD439529      2019-12-20 23:30:00 025X… 2825  OTHER OFFENSE 
##  7 1.21e7 JD272123      2019-01-17 00:00:00 075X… 1750  OFFENSE INVOL…
##  8 1.17e7 JC338476      2019-07-07 11:45:00 008X… 041A  BATTERY       
##  9 1.17e7 JC317897      2019-06-23 00:40:00 049X… 0281  CRIMINAL SEXU…
## 10 1.19e7 JC529798      2019-11-29 17:30:00 065X… 0281  CRIMINAL SEXU…
## # … with 260,417 more rows, and 16 more variables: Description <fct>, `Location
## #   Description` <fct>, Arrest <lgl>, Domestic <lgl>, Beat <fct>,
## #   District <fct>, Ward <int>, `Community Area` <int>, `FBI Code` <chr>, `X
## #   Coordinate` <dbl>, `Y Coordinate` <dbl>, Year <int>, `Updated On` <dttm>,
## #   Latitude <dbl>, Longitude <dbl>, Location <chr>
colnames(Crime_NA) %<>% strsplit(split = " ") %>% lapply(paste, collapse = "")
head(Crime_NA)
## # A tibble: 6 x 22
##       ID CaseNumber Date                Block IUCR  PrimaryType Description
##    <int> <chr>      <dttm>              <fct> <fct> <fct>       <fct>      
## 1 1.22e7 JD399576   2019-12-07 12:33:00 066X… 1544  SEX OFFENSE SEXUAL EXP…
## 2 1.19e7 JC535200   2019-12-04 06:00:00 032X… 1563  SEX OFFENSE CRIMINAL S…
## 3 1.18e7 JC358770   2019-07-21 18:35:00 041X… 041A  BATTERY     AGGRAVATED…
## 4 1.21e7 JD272123   2019-01-17 00:00:00 075X… 1750  OFFENSE IN… CHILD ABUSE
## 5 1.17e7 JC338476   2019-07-07 11:45:00 008X… 041A  BATTERY     AGGRAVATED…
## 6 1.17e7 JC317897   2019-06-23 00:40:00 049X… 0281  CRIMINAL S… NON-AGGRAV…
## # … with 15 more variables: LocationDescription <fct>, Arrest <lgl>,
## #   Domestic <lgl>, Beat <fct>, District <fct>, Ward <int>,
## #   CommunityArea <int>, FBICode <chr>, XCoordinate <dbl>, YCoordinate <dbl>,
## #   Year <int>, UpdatedOn <dttm>, Latitude <dbl>, Longitude <dbl>,
## #   Location <chr>
# Load shapefile containing community areas
com_bounds <- st_read("data/community_areas.shp", quiet = TRUE)
com_bounds %<>% arrange(as.integer(area_numbe))
print(com_bounds, n = 3)
## Simple feature collection with 77 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## geographic CRS: WGS84(DD)
## First 3 features:
##   area area_num_1 area_numbe comarea comarea_id   community perimeter
## 1    0          1          1       0          0 ROGERS PARK         0
## 2    0          2          2       0          0  WEST RIDGE         0
## 3    0          3          3       0          0      UPTOWN         0
##   shape_area shape_len                       geometry
## 1   51259902  34052.40 MULTIPOLYGON (((-87.65456 4...
## 2   98429095  43020.69 MULTIPOLYGON (((-87.68465 4...
## 3   65095643  46972.79 MULTIPOLYGON (((-87.64102 4...
plot(st_geometry(com_bounds))

Area numbers in com_bounds correspond to CommunityArea in Crime_NA, making plotting maps easier.

Crime_summary <- Crime_NA %>% select(CommunityArea) %>% table() %>% as.data.frame()
colnames(Crime_summary) <- c("CommunityArea", "Freq")
head(Crime_summary)
##   CommunityArea Freq
## 1             1 3991
## 2             2 3416
## 3             3 3283
## 4             4 1761
## 5             5 1238
## 6             6 5857
com_bounds$crime_summary <- Crime_summary$Freq
plot(com_bounds["crime_summary"], main = "Crime frequency by community area")

The map is less detailed as it only includes community areas, but its construction is easier and doesn’t require longitude and latitude coordinates (which are often missing).

Use geom_sf to make a ggplot. More details at: https://r-spatial.github.io/sf/articles/sf5.html and https://www.r-spatial.org/r/2018/10/25/ggplot2-sf-2.html

For colours I like to use colorspace: https://cran.r-project.org/web/packages/colorspace/vignettes/colorspace.html

p <- ggplot(com_bounds) + geom_sf(aes(fill = crime_summary, text = community)) + 
  ggtitle("Crime frequency by community area") +
  colorspace::scale_fill_continuous_sequential(5, palette = "Inferno") + theme_void() +
  guides(fill = guide_colorbar(title = "Frequency")) +
  scale_fill_gradientn(colors = colorspace::sequential_hcl(5, palette = "Inferno")[5:1], 
                       breaks = seq(0, 15000, 2500), limits = c(0, 15000))
p

Trying out interactive plots with plotly.

library(plotly)
p %>% ggplotly(tooltip = "text") %>% style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")